package com.momo.demo10_txt;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;

import com.momo.utils.LuceneUtils;

/**
 * txt文件索引
 */
public class IndexFile {

    private Directory directory;

    private String indexPath = "D://lucene/index"; // 建立索引文件的目录

    private String dirPath = "D://lucene/data"; // txt资源目录

    private IndexWriter indexWriter;

    /*
     * 获得所有txt文件
     */
    public List<File> getFileList(String dirPath) {
        File[] files = new File(dirPath).listFiles();
        List<File> fileList = new ArrayList<File>();
        for (File file : files) {
            if (isTxtFile(file.getName())) {
                fileList.add(file);
            }
        }
        return fileList;
    }

    /*
     * 判断是否是txt文件
     */
    public boolean isTxtFile(String fileName) {
        if (fileName.lastIndexOf(".txt") > 0) {
            return true;
        }
        return false;
    }

    /*
     * 将文件转换成Document对象
     */
    public Document fileToDocument(File file) throws Exception {
        Document document = new Document();
        document.add(new Field("filename", file.getName(), Store.YES, Index.ANALYZED));
        document.add(new Field("content", getFileContent(file), Store.YES, Index.ANALYZED));
        document.add(new Field("size", String.valueOf(file.getTotalSpace()), Store.YES, Index.ANALYZED));
        document.add(new Field("path", file.getPath(), Field.Store.YES, Field.Index.NO));
        return document;
    }

    /*
     * 获得indexwriter对象
     */
    public IndexWriter getIndexWriter(Directory dir) throws Exception {
        IndexWriter indexWriter = new IndexWriter(dir, LuceneUtils.analyzer, MaxFieldLength.LIMITED);
        return indexWriter;
    }

    /*
     * 关闭indexwriter对象
     */
    public void closeWriter() throws Exception {
        if (indexWriter != null) {
            indexWriter.close();
        }
    }

    /*
     * 读取文件内容
     */
    public String getFileContent(File file) throws Exception {
        Reader reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
        BufferedReader br = new BufferedReader(reader);
        String result = "";
        while (br.readLine() != null) {
            result = result + "\n" + br.readLine();
        }
        br.close();
        reader.close();
        return result;
    }

    /**
     * 启动初始化
     */
    @Before
    public void init() {
        try {
            directory = FSDirectory.open(new File(indexPath));
            indexWriter = getIndexWriter(directory);
        } catch (Exception e) {
            System.out.println("索引打开异常！");
        }
    }

    /**
     * 创建索引将data文件夹下txt文件获取并把这些文件信息添加索引index文件夹
     */
    @Test
    public void createIndex() throws Exception {
        List<File> fileList = getFileList(dirPath);
        Document document = null;
        for (File file : fileList) {
            document = fileToDocument(file);
            indexWriter.addDocument(document);
            System.out.println("path:" + document.get("path"));
            System.out.println("filename:" + document.get("filename"));
            System.out.println("size:" + document.get("size"));
            System.err.println("content:" + document.get("content"));
            indexWriter.commit();
        }
        closeWriter();
    }

    /**
     * @throws IOException
     */
    @Test
    public void searchIndex() throws Exception {
        IndexSearcher indexSearcher = new IndexSearcher(FSDirectory.open(new File(indexPath)));
        QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[]{"filename", "content"}, LuceneUtils.analyzer);
        Query query = queryParser.parse("机器人");
        TopDocs topDocs = indexSearcher.search(query, 25);
        int count = topDocs.totalHits;// 总的抓取命中的记录数
        System.out.println("总的命中数" + count);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        // 将查询到的索引添加到articleList中
        Map<String, Object> map = new HashMap<String, Object>();
        List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
        for (int i = 0; i < scoreDocs.length; i++) {
            int index = scoreDocs[i].doc;
            Document document = indexSearcher.doc(index);
            System.out.println("当前文件名:" + document.get("filename") + "\t匹配度得分：" + scoreDocs[i].score);
            map.put("path", document.get("path"));
            map.put("filename", document.get("filename"));
            map.put("content", document.get("content"));
            map.put("size", document.get("size"));
            list.add(map);
        }

        // 输出查询到的内容
        for (Map<String, Object> data : list) {
            System.out.println("文件路径" + data.get("path"));
            System.out.println("文件名" + data.get("filename"));
            System.out.println("文件内容" + data.get("content"));
            System.out.println("文件大小" + data.get("size"));
        }
    }
}
